Todo:

ghg_emissions_clean <- read_csv("data/clean_data/ghg_emissions_summary.csv")

-- Column specification ---------------------------------------------------------------------------------------
cols(
  ccp_mapping = col_character(),
  pollutant = col_character(),
  year = col_double(),
  value = col_double(),
  units = col_character()
)


summarised <- group_and_summarise_exclude(ghg_emissions_clean, c("year", "value", "units"))


summarised %>% 
  ggplot() +
  aes(x = .data[["ccp_mapping"]], y = value, fill = .data[["ccp_mapping"]]) +
  geom_col()

new_ulevs %>% 
  filter(!statistic == "Proportion of New Vehicle Registrations that are ULEV") %>% 
  # write_csv("data/clean_data/transport/newly_registered_vehicles_and_ulevs.csv")
  group_by(year, statistic) %>%
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  summarise(value = lag(value)/value, .groups = 'drop_last') %>% 
  drop_na() %>% 
  ggplot() +
  aes(x = year, y = value) +
  geom_line() +
  geom_point()

base_var = "year"
base_value = 2018
base_modifier = 0.8


road_traffic %>% 
  filter(vehicle_type == "Cars") %>% 
  group_by(year) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>%
  ggplot() +
  aes(x = year, y = value) +
  geom_line() +
  geom_point() +
  geom_point(aes(x = 2030, y = (.data[["value"]][.data[[base_var]] == base_value])*base_modifier),
             colour = "darkgreen", shape = 13, size = 5) +
  ylim(0, NA)

road_traffic %>% 
  filter(.data[["year"]] == 2018,
         .data[["vehicle_type"]] == "Cars") %>%
  summarise(value = sum(.data[["value"]]))
sales <- data.frame(
  year = c(2005, 2006, 2007, 2008),
  profit = c(340, 500, 600, 550)
)

sales %>% 
  ggplot() +
  aes(x = year, y = profit) +
  geom_line() +
  geom_point(aes(x = 2010, y = (.data[['profit']][.data[["year"]] == 2008]*1.1)))


pull(sales[sales[["year"]] == 2008, ]["profit"])*1.1
[1] 605
remove <- c("year", "value", "units")
temp <- names(ghg_emissions_clean)

remaining_cols <- temp [!temp %in% remove]

ghg_emissions_clean %>%
  dplyr::group_by_at(remaining_cols) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  
summarised_data <- emissions_data %>% 
  group_and_summarise(ccp_mapping, pollutant)
create_bar_plot <-function(df = summarised_data) {
  x_name <- names(df)[1]
  
  df %>% 
  ggplot() +
    aes(x = x_name, y = value, fill = x_name) +
    geom_col(position = "stack", show.legend = FALSE)
}
test_function <- function(df, var) {
  # convert snakecase variable name to title for ui
  varname <- str_to_title(str_replace_all(var, "_", " "))
  
}
build_slider_input <- function(df, varname) {
  rng <- range(df, na.rm = TRUE)
  
  sliderInput(var,
              paste0(varname, " Range:"),
              min = rng[1],
              max = rng[2],
              value = rng,
              sep = "",
              step = 1,
              ticks = FALSE)
}
  
test_function(df = emissions_data, var = "pollutant")
[1] "pickerInput"
substr("jewel", 2, nchar("jewel"))
[1] "ewel"
test <- "ccp_mapping\",\"pollutant"

eval(parse(text=gsub("\\", "", deparse(test), fixed=TRUE)))
Error in parse(text = gsub("\\", "", deparse(test), fixed = TRUE)) : 
  <text>:1:14: unexpected ','
1: "ccp_mapping",
                 ^
test_function <- function(df, var) {
  # convert snakecase variable name to title for ui
  varname <- str_to_title(str_replace_all(var, "_", " "))
  
  print(dropdown_lookup[[var]])
}
  
emissions_data %>% 
  group_by_("ccp_mapping", "pollutant") %>% 
  summarise(value = sum(value, na.rm = TRUE))
`summarise()` regrouping output by 'ccp_mapping' (override with `.groups` argument)
emissions_data %>% 
  group_by(ccp_mapping, pollutant) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  ggplot() +
  aes(x = names(emissions_data)[1], y = value) +
  geom_col()

emissions_data <- read_csv("data/clean_data/ghg_emissions.csv")

-- Column specification ------------------------------------------------------------------------------------------------------------------------
cols(
  ccp_mapping = col_character(),
  source_name = col_character(),
  pollutant = col_character(),
  year = col_double(),
  value = col_double(),
  units = col_character()
)
emissions_data %>%
  group_by(year) %>% 
  summarise(value = sum(value, na.rm = TRUE)) %>% 
  ggplot() +
    aes(x = year,
        y = value) +
  geom_line() +
  ylim(0, NA)
`summarise()` ungrouping output (override with `.groups` argument)

Only emissions - > emissions that are greater than 0

ghg_true_emissions <- ghg_emissions_clean %>% 
  filter(year == max(ghg_emissions_clean$year)) %>% 
  filter(value >= 0) %>% 
  mutate(across(where(is.character), ~str_to_title(.)))
# need to split by pollutant and year
ghg_emissions_clean %>% 
  select(ccp_mapping, source_name) %>% 
  filter(str_detect(source_name, paste("^", ccp_mapping, sep = ""))) %>% 
  unique()
ghg_wide_emissions <- ghg_wide %>%
  filter(!value < 0)

ghg_wide_sinks <- ghg_wide %>% 
  filter(value < 0)

get_child_cols <- function(df, additional_vars, standard_vars = c("value", "units")) {
  temp <- names(df)
  remove <- c(additional_vars, standard_vars)
  
  child_cols <- temp [!temp %in% remove]
}


child_cols <- get_child_cols(ghg_wide_emissions, additional_vars)

child_cols
[1] "child_order_0" "child_order_1" "child_order_2" "child_order_3"
previous_children
NULL
create_child_table(ghg_wide_emissions,
                   current_child = "child_order_0",
                   previous_children = NULL,
                   additional_vars = c("pollutant", "year"))
Error: Problem with `mutate()` input `..1`.
x Input `..1` must be a vector, not a `quosure/formula` object.
i Input `..1` is `current_child`.
Run `rlang::last_error()` to see where the error occurred.
previous_children <- NULL

children <- c(previous_children, "child_order_0")

children
[1] "child_order_0"
road_traffic <- read_csv("data/clean_data/transport/road_traffic.csv")

-- Column specification -----------------------------------------------------------------------------------------------------------------------
cols(
  road_type = col_character(),
  vehicle_type = col_character(),
  year = col_double(),
  value = col_double(),
  units = col_character()
)


road_traffic %>% 
  filter(road_type == "Major roads (M and A)") %>%
  filter(!str_detect(vehicle_type, "^All")) %>% 
  filter(year == 2018) %>% 
  ggplot() +
  aes(x = .data[["vehicle_type"]], y = value, fill = .data[["vehicle_type"]]) +
  geom_col() +
  ylim(0, NA)

create_child_table <- function(df, current_child, previous_children, additional_vars) {
  current_child <- quo(current_child)
  previous_children <- quo(previous_children)
  additional_vars <- quo(additional_vars)
  
  df %>% 
    group_by(!!current_child, rev(!!previous_children), !!additional_vars) %>% 
    summarise(value = sum(value), .groups = 'drop') %>% 
    unite(!!previous_children, col = "parent", sep = " - ") %>% 
    unite(c(!!previous_children, !!current_child), sep = " - ") %>% 
    select(id, label = !!current_child, parent, !!additional_vars, value)
}
names(ghg_wide_emissions) %in% "child_order_[0-9]"
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
  
temp <- names(ghg_wide_emissions)
remove <- c(additional_vars, "value", "units")


temp [!temp %in% remove]
[1] "child_order_0" "child_order_1" "child_order_2" "child_order_3"

TODO::

dashboard - review page dashboard - transport explorer

create_child_table(ghg_wide_emissions, current_child = "child_order_0",
                   previous_children = "", additional_vars = c("pollutant", "year"))
Error: Invalid index: out of bounds
create_child_table(ghg_wide_emissions, current_child = child_order_0,
                   previous_children = NULL, additional_vars = c(pollutant, year))
Error: Problem with `mutate()` input `..1`.
x Input `..1` must be a vector, not a `quosure/formula` object.
i Input `..1` is `current_child`.
Run `rlang::last_error()` to see where the error occurred.
if (is.null(previous_children)) {
  print("bean")
}
[1] "bean"
vector <- c("child_1", "child_2")

paste(paste(vector, collapse = " - "),"child_3", sep = " - ")
[1] "child_1 - child_2 - child_3"

df %>% group_by(current_child, rev(previous_children), additional_vars) %>% summarise(value = sum(value), .groups = “drop”) %>% mutate(id = paste(previous_children, current_child, sep = " - “), parent = paste(previous_children, sep =” - ")) %>% select(id, label = current_child, parent, additional_vars, value)

vector <- c()

paste(paste(vector, sep = " - "),"child_3", sep = " - ")
[1] " - child_3"
vector <- c()

tibble(
  parent = c("bean", "curd", "whey", "sprout", ""),
  people = c("david", "sasha", "john", "smith", "delilah"),
  shapes = c("triangle", "rectangle", "square", "diamond", "")
) %>% 
  unite(c(1,2,3), col = "id", sep = " - ", remove = FALSE)
tibble(
  parent = c("dad", "bean", "", "fava"),
  child = c("")
) %>% 
  select(child) %>% 
  pull()
[1] "" "" "" ""
# get sector totals for parent df (top level)

ghg_sector_totals <- ghg_true_emissions %>% 
  group_by(ccp_mapping) %>% 
  summarise(sector_total = sum(value), .groups = "drop_last")

parent_df <- ghg_sector_totals %>% 
  mutate(parent = "") %>% 
  select(label = ccp_mapping,
         parent,
         value = sector_total)

# the rest of the data

children_df <- ghg_true_emissions %>% 
  filter(year == max(ghg_true_emissions$year)) %>% 
  group_by(source_name, ccp_mapping) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  select(label = source_name,
         parent = ccp_mapping,
         value)

# combine into one hierarchical df, create id column

emissions_long <- bind_rows(list(parent_df, children_df)) %>% 
  mutate(id = paste(parent, label, sep = " - "), .before = 'label') %>% 
  mutate(id = str_remove(id, "^ - "))

Other method - seperate column

parents <- emissions_wide %>% 
  filter(is.na(second)) %>% 
  select(id, label, parent, value)
first_born <- emissions_wide %>% 
  filter(!is.na(second)) %>% 
  group_by(second, first) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  mutate(id = paste(first, second, sep = " - ")) %>% 
  ungroup() %>% 
  select(id,
         label = second,
         parent = first,
         value)
second_born <- emissions_wide %>% 
  filter(!is.na(third)) %>% 
  group_by(third, second, first) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  mutate(id = paste(first, second, third, sep = " - ")) %>% 
  mutate(parent = paste(first, second, sep = " - ")) %>% 
  ungroup() %>% 
  select(id,
         label = third,
         parent,
         value)
third_born <- emissions_wide %>% 
  filter(!is.na(fourth)) %>% 
  group_by(fourth, third, second, first) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  mutate(id = paste(first, second, third, fourth, sep = " - ")) %>% 
  mutate(parent = paste(first, second, third, sep = " - ")) %>% 
  ungroup() %>% 
  select(id,
         label = fourth,
         parent,
         value)
n_potential_child_layers <- emissions_long %>% 
  select(id) %>% 
  pull() %>% 
  str_count(" - ") %>% 
  max()
# the number of potential child layers is the maximum included in the dataset
n_potential_child_layers <- emissions_long %>% 
  select(id) %>% 
  pull() %>% 
  str_count(" - ") %>% 
  max()
create_hierarchy_df <- function(df, id_col = "id", id_sep = " - ") {
  n_potential_child_layers <- df %>% 
    select(col) %>% 
    pull() %>% 
    str_count(sep) %>% 
    max()
  
  
}
fig <- plot_ly()

fig <- fig %>%
  add_trace(
    name = "Emissions",
    ids = emissions$id,
    labels = emissions$label,
    parents = emissions$parent,
    values = emissions$value,
    text = emissions$units,
    type = 'sunburst',
    maxdepth = 2,
    domain = list(column = 0),
    branchvalues = 'total',
    insidetextorientation = 'radial',
    marker=list(colorscale='Viridis'),
    text = ~units,
    textinfo='label+percent root+value',
    hoverinfo = paste("%{label}: <br>%{value}",'text')
    ) 
fig <- fig %>%
  add_trace(
    name = "Sinks",
    ids = sinks$id,
    labels = sinks$label,
    parents = sinks$parent,
    values = sinks$value,
    text = sinks$units,
    type = 'sunburst',
    maxdepth = 2,
    domain = list(column = 1),
    branchvalues = 'total',
    insidetextorientation = 'radial',
    marker=list(colorscale='Viridis'),
    text = ~units,
    textinfo='label+percent root+value',
    hoverinfo = paste("%{label}: <br>%{value}",'text')
  ) 
fig <- fig %>%
    layout(
      grid = list(columns =2, rows = 1),
      margin = list(l = 0, r = 0, b = 0, t = 0))

fig
tibble(
  labels = c("Eve", "Seth", "Enos", "Noam", "Awan", "Enoch"),
  parents = c("", "Eve", "Seth", "Seth", "Eve", "Awan"),
  values = c(16, 12, 10, 2, 4, 4)
)
fig <- plot_ly(
  labels = cain_ble$labels,
  parents = cain_ble$parents,
  values = cain_ble$values,
  type = 'sunburst'
)

fig
emissions_long$label[1:10]
 [1] "Agriculture"            "Electricity Generation" "Industry"               "Land use"               "Residential"           
 [6] "Services"               "Transport"              "Waste"                  "Accidental fires"       "Accidental fires"      
d <- data.frame(
    ids = c(
    "North America", "Europe", "Australia", "North America - Football", "Soccer",
    "North America - Rugby", "Europe - Football", "Rugby",
    "Europe - American Football","Australia - Football", "Association",
    "Australian Rules", "Autstralia - American Football", "Australia - Rugby",
    "Rugby League", "Rugby Union"
  ),
  labels = c(
    "North<br>America", "Europe", "Australia", "Football", "Soccer", "Rugby",
    "Football", "Rugby", "American<br>Football", "Football", "Association",
    "Australian<br>Rules", "American<br>Football", "Rugby", "Rugby<br>League",
    "Rugby<br>Union"
  ),
  parents = c(
    "", "", "", "North America", "North America", "North America", "Europe",
    "Europe", "Europe","Australia", "Australia - Football", "Australia - Football",
    "Australia - Football", "Australia - Football", "Australia - Rugby",
    "Australia - Rugby"
  ),
  stringsAsFactors = FALSE
)

fig <- plot_ly(d, ids = ~ids, labels = ~labels, parents = ~parents, type = 'sunburst')

d
class(emissions_long)
[1] "tbl_df"     "tbl"        "data.frame"
class(cain_ble)
[1] "tbl_df"     "tbl"        "data.frame"
class(cain_ble)
diff_order <- ghg_emissions_clean %>% 
  dplyr::group_by(ccp_mapping, year, pollutant) %>% 
  dplyr::summarise(value = sum(value), units = units[1], .groups = "keep") %>%
  filter(pollutant == "CO2") %>% 
  filter(year == min(ghg_emissions_clean$year) |
           year == max(ghg_emissions_clean$year)) %>% 
  ungroup() %>% 
  group_by(ccp_mapping) %>% 
  mutate(diff = lag(value) - value) %>% 
  arrange(diff) %>%
  drop_na() %>% 
  select(ccp_mapping) %>% 
  pull()
ghg_emissions_clean %>% 
  dplyr::group_by(ccp_mapping, year, pollutant) %>% 
  dplyr::summarise(value = sum(value), units = units[1], .groups = "keep") %>%
  filter(pollutant == "CO2") %>%
  mutate(ccp_mapping = factor(ccp_mapping, levels = rev(diff_order))) %>% 
  ggplot() +
  aes(x = year, y = value, fill = ccp_mapping) +
  geom_area() +
  facet_wrap(~pollutant) +
  theme_bw()
ghg_emissions_clean %>% 
  group_by(pollutant) %>% 
  summarise(emissions = sum(value))
plotly::ggplotly(
  ghg_emissions_clean %>% 
    filter(year == 2018) %>%
    filter(pollutant %in% c("CO2", "CH4")) %>%
    ggplot() +
    aes(x = factor(ccp_mapping, levels = rev(levels(factor(ccp_mapping)))),
        y = value, fill = source_name,
        text = paste0('</br> Sector: ', ccp_mapping,
                      '</br> Emissions: ', value,
                      '</br> Source Name: ', source_name)) +
    geom_col(position = "stack") +
    theme_bw() +
    theme(legend.position = "none") +
    labs(x = "Sector",
         y = paste0("Emissions (", ghg_emissions_clean$units[1], ")")) +
    coord_flip(),
    tooltip = 'text'
  )
ghg_emissions_data %>% 
  names()
ghg_emissions_data %>% 
  select()
input <- list()

input$col_choice = "national_communication_categories"
ghg_emissions_clean %>%
  group_by_(input$col_choice, "emission_year") %>% 
  summarise(total_ghg_emissions = sum(emissions)) %>% 
  ggplot() +
  aes(x = EmissionYear, y = total_ghg_emissions, group = `National Communication Categories`, colour = `National Communication Categories`) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1990,2020,5)) +
  theme(legend.position = 0)
ghg_emissions_data %>% 
  distinct(`National Communication Categories`)
ghg_emissions_data %>% 
  filter(EmissionYear != "BaseYear") %>% 
  mutate(EmissionYear = as.numeric(EmissionYear)) %>% 
  group_by(EmissionYear) %>% 
  summarise(total_ghg_emissions = sum(`Emissions (MtCO2e)`)) %>% 
  ggplot() +
  aes(x = EmissionYear, y = total_ghg_emissions) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1990,2020,5)) +
  ylim(0, 80) +
  theme(legend.position = 0) +
  theme_bw()
ghg_emissions_data %>% 
  distinct(`CCP mapping`)
ghg_emissions_data %>% 
  filter(EmissionYear != "BaseYear") %>% 
  mutate(EmissionYear = as.numeric(EmissionYear)) %>% 
  group_by(`CCP mapping`, EmissionYear) %>% 
  summarise(total_ghg_emissions = sum(`Emissions (MtCO2e)`)) %>% 
  ggplot() +
  aes(x = EmissionYear, y = total_ghg_emissions, group = `CCP mapping`, colour = `CCP mapping`) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1990,2020,5))
`summarise()` regrouping output by 'CCP mapping' (override with `.groups` argument)

ghg_emissions_data %>% 
  filter(EmissionYear != "BaseYear") %>% 
  mutate(EmissionYear = as.numeric(EmissionYear)) %>% 
  filter(`National Communication Categories` != `CCP mapping`) %>% 
  select(`National Communication Categories`, `CCP mapping`) %>% 
  unique()
emissions_sankey <- emissions_data %>% 
  select(ccp_mapping, source_name, pollutant, emission_year, emissions, units)

category_id,category_name,subcategory_id,subcategory_name,year,emissions,emission

filtered_df <- ghg_emissions_clean %>% 
  select(ccp_mapping, source_name, pollutant, emission_year, emissions, units) %>% 
  filter(pollutant == "CO2") %>% 
  filter(emission_year == "2005")
total_emissions_for_gas <- filtered_df %>% 
  summarise(sum(emissions)) %>% 
  pull()

total_emissions_by_category <- filtered_df %>% 
  select(-pollutant) %>% 
  group_by(ccp_mapping) %>% 
  summarise(cat_sum = sum(emissions), .groups = 'drop_last')
categories <- filtered_df %>% 
  distinct(ccp_mapping) %>% 
  pull()

n_categories <- length(categories)

sources <- filtered_df %>% 
  distinct(source_name) %>% 
  pull()

n_sources <- length(sources)
n_sources
[1] 159
node_names <- c("Total", categories, sources, "Other")

node_names_df <- data.frame("name" = node_names)

total_sankey_tibble <- total_emissions_by_category %>%
  mutate(total = "Total") %>% 
  mutate(total = match(total, node_names) -1) %>% 
  mutate(ccp_mapping = match(ccp_mapping, node_names) -1) %>% 
  select(source = total,
         target = ccp_mapping,
         value = cat_sum)

total_filtered_emissions <- total_sankey_tibble %>% 
  summarise(sum(value)) %>% 
  pull()

other_emissions <- total_emissions_for_gas - total_filtered_emissions

total_other_sankey_tibble <- tibble(
  "source" = c(0),
  "target" = (match("Other", node_names) -1),
  "value" = c(other_emissions)
)


sub_sankey_tibble <- filtered_df %>% 
  select(- c(units, pollutant, emission_year)) %>% 
  mutate(ccp_mapping = match(ccp_mapping, node_names) -1,
         source_name = match(source_name, node_names) -1)

names(sub_sankey_tibble) = c("source", "target", "value")

sankey_tibble <- total_sankey_tibble %>% 
  bind_rows(sub_sankey_tibble) %>% 
  bind_rows(total_other_sankey_tibble)

links_matrix <- data.frame(as.matrix(sankey_tibble, byrow = TRUE, ncols = 3))

# Add a 'group' column to each connection:
links <- links_matrix %>% 
  mutate(group = case_when(
    source == 0 ~ paste("type_", target, sep = ""),
    source!=0 ~ paste("type_", source, sep = "")
  ))

nodes <- node_names_df
# Add a 'group' column to each node.
# All of them in the same group to make them the same colour
nodes$group <- as.factor(c("my_unique_group"))

emissions <- list()

emissions$nodes <- nodes
emissions$links <- links
node_names <- c("Total", categories, sources, "Other")

node_names_df <- data.frame("name" = node_names)

total_sankey_tibble <- total_emissions_by_category %>%
  mutate(total = "Total") %>% 
  mutate(total = match(total, node_names) -1) %>% 
  mutate(ccp_mapping = match(ccp_mapping, node_names) -1) %>% 
  select(source = total,
         target = ccp_mapping,
         value = cat_sum)

total_filtered_emissions <- total_sankey_tibble %>% 
  summarise(sum(value)) %>% 
  pull()

other_emissions <- total_emissions_for_gas - total_filtered_emissions

total_other_sankey_tibble <- tibble(
  "source" = c(0),
  "target" = (match("Other", node_names) -1),
  "value" = c(other_emissions)
)


sub_sankey_tibble <- filtered_df %>% 
  select(- c(units, pollutant, emission_year)) %>% 
  mutate(ccp_mapping = match(ccp_mapping, node_names) -1,
         source_name = match(source_name, node_names) -1)

names(sub_sankey_tibble) = c("source", "target", "value")

sankey_tibble <- total_sankey_tibble %>% 
  bind_rows(sub_sankey_tibble) %>% 
  bind_rows(total_other_sankey_tibble)

links_matrix <- data.frame(as.matrix(sankey_tibble, byrow = TRUE, ncols = 3))

# Add a 'group' column to each connection:
links <- links_matrix %>% 
  mutate(group = case_when(
    source == 0 ~ paste("type_", target, sep = ""),
    source!=0 ~ paste("type_", source, sep = "")
  ))

nodes <- node_names_df
# Add a 'group' column to each node.
# All of them in the same group to make them the same colour
nodes$group <- as.factor(c("my_unique_group"))

emissions <- list()

emissions$nodes <- nodes
emissions$links <- links
total_filtered_emissions <- total_sankey_tibble %>% 
  summarise(sum(value)) %>% 
  pull()

other_emissions <- total_emissions_for_gas - total_filtered_emissions

total_other_sankey_tibble <- tibble(
  "source" = c(0),
  "target" = (match("Other", node_names) -1),
  "value" = c(other_emissions)
)

sub_sankey_tibble <- filtered_tibble %>% 
  select(-category_id, -subcategory_id, -year) %>% 
  mutate(category_name = match(category_name, node_names) -1,
         subcategory_name = match(subcategory_name, node_names) -1)

names(sub_sankey_tibble) = c("source", "target", "value")

sankey_tibble <- total_sankey_tibble %>% 
  bind_rows(sub_sankey_tibble) %>% 
  bind_rows(total_other_sankey_tibble)

links_matrix <- data.frame(as.matrix(sankey_tibble, byrow = TRUE, ncols = 3))

# Add a 'group' column to each connection:
links <- links_matrix %>% 
  mutate(group = case_when(
    source == 0 ~ paste("type_", target, sep = ""),
    source!=0 ~ paste("type_", source, sep = "")
  ))

nodes <- node_names_df
# Add a 'group' column to each node.
# All of them in the same group to make them the same colour
nodes$group <- as.factor(c("my_unique_group"))

emissions <- list()

emissions$nodes <- nodes
emissions$links <- links
make_sankey_dfs <- function(data, userYear, userGas) {
  n_categories <- data %>% 
    distinct(category_name) %>% 
    nrow()
  
  total_emissions_for_gas <- data %>% 
    filter(emission == userGas()) %>% 
    filter(year == userYear()) %>%
    summarise(sum(emissions)) %>% 
    pull()
  
  filtered_tibble <- data %>%
    filter(emission == userGas()) %>% 
    select(-emission) %>% 
    filter(year == userYear()) %>% 
    filter(emissions > userResolution())
  
  total_emissions_by_cat <- filtered_tibble %>%
    group_by(category_name) %>% 
    summarise(cat_sum = sum(emissions), .groups = 'drop_last')
  
  categories <- filtered_tibble %>%
    distinct(category_name) %>% 
    pull()
  
  subcategories <- filtered_tibble %>%
    distinct(subcategory_name) %>% 
    pull()
  
  node_names <- c("Total", categories, subcategories, "Other")
  
  node_names_df <- data.frame("name" = node_names)
  
  total_sankey_tibble <- total_emissions_by_cat %>%
    mutate(total = "Total") %>% 
    mutate(total = match(total, node_names) -1) %>% 
    mutate(category_name = match(category_name, node_names) -1) %>% 
    select(source = total,
           target = category_name,
           value = cat_sum)
  
  total_filtered_emissions <- total_sankey_tibble %>% 
    summarise(sum(value)) %>% 
    pull()
  
  other_emissions <- total_emissions_for_gas - total_filtered_emissions
  
  total_other_sankey_tibble <- tibble(
    "source" = c(0),
    "target" = (match("Other", node_names) -1),
    "value" = c(other_emissions)
  )
  
  sub_sankey_tibble <- filtered_tibble %>% 
    select(-category_id, -subcategory_id, -year) %>% 
    mutate(category_name = match(category_name, node_names) -1,
           subcategory_name = match(subcategory_name, node_names) -1)
  
  names(sub_sankey_tibble) = c("source", "target", "value")
  
  sankey_tibble <- total_sankey_tibble %>% 
    bind_rows(sub_sankey_tibble) %>% 
    bind_rows(total_other_sankey_tibble)
  
  links_matrix <- data.frame(as.matrix(sankey_tibble, byrow = TRUE, ncols = 3))
  
  # Add a 'group' column to each connection:
  links <- links_matrix %>% 
    mutate(group = case_when(
      source == 0 ~ paste("type_", target, sep = ""),
      source!=0 ~ paste("type_", source, sep = "")
    ))
  
  nodes <- node_names_df
  # Add a 'group' column to each node.
  # All of them in the same group to make them the same colour
  nodes$group <- as.factor(c("my_unique_group"))
  
  emissions <- list()
  
  emissions$nodes <- nodes
  emissions$links <- links
  
  return(emissions)
}
make_sankey_dfs(emissions_sankey, userYear = 2005, userGas = "CH4", userResolution = 50)
---
title: "R Notebook"
output: html_notebook
---
# Todo:

- add columns for totals for each pollutant/sector

```{r}
# load libraries and read data
library(tidyverse)
library(readxl)
library(plotly)

ghg_emissions_clean <- read_csv("data/clean_data/ghg_emissions_summary.csv")
```

```{r}


summarised <- group_and_summarise_exclude(ghg_emissions_clean, c("year", "value", "units"))


summarised %>% 
  ggplot() +
  aes(x = .data[["ccp_mapping"]], y = value, fill = .data[["ccp_mapping"]]) +
  geom_col()
```


```{r}
new_ulevs %>% 
  filter(!statistic == "Proportion of New Vehicle Registrations that are ULEV") %>% 
  # write_csv("data/clean_data/transport/newly_registered_vehicles_and_ulevs.csv")
  group_by(year, statistic) %>%
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  summarise(value = lag(value)/value, .groups = 'drop_last') %>% 
  drop_na() %>% 
  ggplot() +
  aes(x = year, y = value) +
  geom_line() +
  geom_point()
```


```{r}
base_var = "year"
base_value = 2018
base_modifier = 0.8


road_traffic %>% 
  filter(vehicle_type == "Cars") %>% 
  group_by(year) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>%
  ggplot() +
  aes(x = year, y = value) +
  geom_line() +
  geom_point() +
  geom_point(aes(x = 2030, y = (.data[["value"]][.data[[base_var]] == base_value])*base_modifier),
             colour = "darkgreen", shape = 13, size = 5) +
  ylim(0, NA)
```

```{r}
road_traffic %>% 
  filter(.data[["year"]] == 2018,
         .data[["vehicle_type"]] == "Cars") %>%
  summarise(value = sum(.data[["value"]]))
```
```{r}
road_traffic[road_traffic[["year"]] == 2018 & "vehicle_type" == "Cars", ]["value"]
```


```{r}
sales <- data.frame(
  year = c(2005, 2006, 2007, 2008),
  profit = c(340, 500, 600, 550)
)

sales %>% 
  ggplot() +
  aes(x = year, y = profit) +
  geom_line() +
  geom_point(aes(x = 2010, y = (.data[[]][.data[["year"]] == 2008]*1.1)))

pull(sales[sales[["year"]] == 2008, ]["profit"])*1.1
```

```{r}

```


```{r}
remove <- c("year", "value", "units")
temp <- names(ghg_emissions_clean)

remaining_cols <- temp [!temp %in% remove]

ghg_emissions_clean %>%
  dplyr::group_by_at(remaining_cols) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  
```

```{r}
summarised_data <- emissions_data %>% 
  group_and_summarise(ccp_mapping, pollutant)
```

```{r}
create_bar_plot <-function(df = summarised_data) {
  x_name <- names(df)[1]
  

}
```





```{r}
test_function <- function(df, var) {
  # convert snakecase variable name to title for ui
  varname <- str_to_title(str_replace_all(var, "_", " "))
  
}
```


```{r}

```


```{r}
build_slider_input <- function(df, varname) {
  rng <- range(df, na.rm = TRUE)
  
  sliderInput(var,
              paste0(varname, " Range:"),
              min = rng[1],
              max = rng[2],
              value = rng,
              sep = "",
              step = 1,
              ticks = FALSE)
}
  
```

```{r}
test_function(df = emissions_data, var = "pollutant")
```



```{r}
substr("jewel", 2, nchar("jewel"))
```


```{r}
test <- "ccp_mapping\",\"pollutant"

eval(parse(text=gsub("\\", "", deparse(test), fixed=TRUE)))
```
```{r}
paste0(c("first", "second"), collapse = ",")
```


```{r}
emissions_data %>% 
  group_by_("ccp_mapping", "pollutant") %>% 
  summarise(value = sum(value, na.rm = TRUE))
```



```{r}
emissions_data %>% 
  group_by(ccp_mapping, pollutant) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  ggplot() +
  aes(x = names()[1], y = value) +
  geom_col()
```
```{r}

```


```{r}
emissions_data <- read_csv("data/clean_data/ghg_emissions.csv")
```

```{r}
emissions_data %>%
  group_by(year) %>% 
  summarise(value = sum(value, na.rm = TRUE)) %>% 
  ggplot() +
    aes(x = year,
        y = value) +
  geom_line() +
  ylim(0, NA)
```


Only emissions - > emissions that are greater than 0

```{r}
ghg_true_emissions <- ghg_emissions_clean %>% 
  filter(year == max(ghg_emissions_clean$year)) %>% 
  filter(value >= 0) %>% 
  mutate(across(where(is.character), ~str_to_title(.)))
```

```{r}
# need to split by pollutant and year
ghg_emissions_clean %>% 
  select(ccp_mapping, source_name) %>% 
  filter(str_detect(source_name, paste("^", ccp_mapping, sep = ""))) %>% 
  unique()
```

```{r}
# var_names


```

```{r}

get_child_cols <- function(df, additional_vars, standard_vars = c("value", "units")) {
  temp <- names(df)
  remove <- c(additional_vars, standard_vars)
  
  child_cols <- temp [!temp %in% remove]
}


child_cols <- get_child_cols(ghg_wide_emissions, additional_vars)

child_cols
```

```{r}
child_cols <- c("child_order_0", child_names)

previous_children <- NULL

hierarchy_df <- tibble()

for (child in child_cols) {
  print(child)
  intermediate_tibble <- create_child_table(ghg_wide_emissions,
                                            current_child = child,
                                            previous_children = previous_children,
                                            additional_vars = c("pollutant", "year"))
  previous_children <- c(previous_children, child)
  hierarchy_df %>% 
    bind_rows(intermediate_tibble)
}

hierarchy_df
```



```{r}

```





```{r}
create_child_table(ghg_wide_emissions,
                   current_child = "child_order_0",
                   previous_children = NULL,
                   additional_vars = c("pollutant", "year"))
```


```{r}
previous_children <- NULL

children <- c(previous_children, "child_order_0")

children
```



```{r}

```
```{r}
road_traffic <- read_csv("data/clean_data/transport/road_traffic.csv")
```

```{r}
road_traffic %>%
  mutate(vehicle_type = ifelse(str_detect(vehicle_type, "^All traffic"), "All Traffic", str_to_title(vehicle_type))) %>% 
  mutate(units = "Vehicle Kilometers (Millions)") %>% 
  rename(value = vehicle_kilometers_millions) %>% 
  write_csv("data/clean_data/transport/road_traffic.csv")
  
```


```{r}
road_traffic %>% 
  filter(road_type == "Major roads (M and A)") %>%
  filter(!str_detect(vehicle_type, "^All")) %>% 
  filter(year == 2018) %>% 
  ggplot() +
  aes(x = .data[["vehicle_type"]], y = value, fill = .data[["vehicle_type"]]) +
  geom_col() +
  ylim(0, NA)
```


```{r}
road_traffic %>% 
  filter(!str_detect(road_type, "^All")) %>% 
  filter(!str_detect(vehicle_type, "^All")) %>%
  # write_csv()
  group_by(year) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  ggplot() +
  aes(x = year, y = value) +
  geom_line()
```

```{r}

```



```{r}
create_child_table <- function(df, current_child, previous_children, additional_vars) {
  hierarchy_df <- ghg_wide_emissions %>% 
    group_by_(.dots = c(current_child,
                        rev(previous_children),
                        additional_vars)) %>% 
    summarise(value = sum(value), .groups = 'drop')
  
  if (is.null(previous_children)) {
    hierarchy_df %>%
      mutate(parent = "") %>% 
      select(id = current_child, label = current_child,
             parent, additional_vars, value)
  } else {
    hierarchy_df %>% 
      unite(previous_children, col = "parent", sep = " - ", remove = FALSE) %>% 
      unite(c(previous_children, current_child),
            col = "id", sep = " - ", remove = FALSE) %>% 
      select(id, label = current_child, parent, additional_vars, value)
  }
}
```


```{r}
names(ghg_wide_emissions) %in% "child_order_[0-9]"
  
temp <- names(ghg_wide_emissions)
remove <- c(additional_vars, "value", "units")


temp [!temp %in% remove]
```

TODO::

dashboard - review page
dashboard - transport explorer


```{r}
help(magrittr)
s```


```{r}
create_child_table(ghg_wide_emissions, current_child = "child_order_0",
                   previous_children = "", additional_vars = c("pollutant", "year"))
```


```{r}
create_child_table(ghg_wide_emissions, current_child = child_order_0,
                   previous_children = NULL, additional_vars = c(pollutant, year))
```

```{r}
current_child <- "child_order_1"
previous_children <- "child_order_0"
additional_vars <- c("pollutant", "year")

hierarchy_df <- ghg_wide_emissions %>% 
  group_by_(.dots = c(current_child, rev(previous_children), additional_vars)) %>% 
  summarise(value = sum(value), .groups = 'drop')

if (is.null(previous_children)) {
  hierarchy_df %>%
    mutate(parent = "") %>% 
    select(id = current_child, label = current_child, parent, additional_vars, value)
} else {
  hierarchy_df %>% 
    unite(previous_children, col = "parent", sep = " - ", remove = FALSE) %>% 
    unite(c(previous_children, current_child), col = "id", sep = " - ", remove = FALSE) %>% 
    select(id, label = current_child, parent, additional_vars, value)
}
  
```

```{r}
if (is.null(previous_children)) {
  print("bean")
}
```




df %>%
  group_by(current_child, rev(previous_children), additional_vars) %>% 
  summarise(value = sum(value), .groups = "drop") %>% 
  mutate(id = paste(previous_children, current_child, sep = " - "),
         parent = paste(previous_children, sep = " - ")) %>% 
  select(id, label = current_child, parent, additional_vars, value)

```{r}
vector <- c("child_1", "child_2")

paste(paste(vector, collapse = " - "),"child_3", sep = " - ")
```

```{r}
vector <- c()

paste(paste(vector, sep = " - "),"child_3", sep = " - ")
```


```{r}
vector <- c()

tibble(
  parent = c("bean", "curd", "whey", "sprout", ""),
  people = c("david", "sasha", "john", "smith", "delilah"),
  shapes = c("triangle", "rectangle", "square", "diamond", "")
) %>% 
  unite(c(1,2,3), col = "id", sep = " - ", remove = FALSE)
```


```{r}
tibble(
  parent = c("dad", "bean", "", "fava"),
  child = c("")
) %>% 
  select(child) %>% 
  pull()
```


```{r}
# get sector totals for parent df (top level)

ghg_sector_totals <- ghg_true_emissions %>% 
  group_by(ccp_mapping) %>% 
  summarise(sector_total = sum(value), .groups = "drop_last")

parent_df <- ghg_sector_totals %>% 
  mutate(parent = "") %>% 
  select(label = ccp_mapping,
         parent,
         value = sector_total)

# the rest of the data

children_df <- ghg_true_emissions %>% 
  filter(year == max(ghg_true_emissions$year)) %>% 
  group_by(source_name, ccp_mapping) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  select(label = source_name,
         parent = ccp_mapping,
         value)

# combine into one hierarchical df, create id column

emissions_long <- bind_rows(list(parent_df, children_df)) %>% 
  mutate(id = paste(parent, label, sep = " - "), .before = 'label') %>% 
  mutate(id = str_remove(id, "^ - "))
```

# Other method - seperate column

```{r}
emissions_wide <- emissions_long %>% 
  separate(id, " - ", into = c("first", "second", "third", "fourth"),
           remove = FALSE, extra = "merge", fill = "right")
```

```{r}
parents <- emissions_wide %>% 
  filter(is.na(second)) %>% 
  select(id, label, parent, value)
```

```{r}
first_born <- emissions_wide %>% 
  filter(!is.na(second)) %>% 
  group_by(second, first) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  mutate(id = paste(first, second, sep = " - ")) %>% 
  ungroup() %>% 
  select(id,
         label = second,
         parent = first,
         value)
```

```{r}
second_born <- emissions_wide %>% 
  filter(!is.na(third)) %>% 
  group_by(third, second, first) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  mutate(id = paste(first, second, third, sep = " - ")) %>% 
  mutate(parent = paste(first, second, sep = " - ")) %>% 
  ungroup() %>% 
  select(id,
         label = third,
         parent,
         value)
```

```{r}
third_born <- emissions_wide %>% 
  filter(!is.na(fourth)) %>% 
  group_by(fourth, third, second, first) %>% 
  summarise(value = sum(value), .groups = "drop_last") %>% 
  mutate(id = paste(first, second, third, fourth, sep = " - ")) %>% 
  mutate(parent = paste(first, second, third, sep = " - ")) %>% 
  ungroup() %>% 
  select(id,
         label = fourth,
         parent,
         value)
```

```{r}
# the number of potential child layers is the maximum included in the dataset
n_potential_child_layers <- emissions_long %>% 
  select(id) %>% 
  pull() %>% 
  str_count(" - ") %>% 
  max()
```



```{r}
create_hierarchy_df <- function(df, id_col = "id", id_sep = " - ") {
  n_potential_child_layers <- df %>% 
    select(col) %>% 
    pull() %>% 
    str_count(sep) %>% 
    max()
  
  
}
```



```{r}
```


```{r}
emissions_long <- bind_rows(list(parents, first_born, second_born, third_born)) %>% 
  mutate(units = ghg_emissions_clean$units[1]) %>% 
  write_csv("data/clean_data/hierarchical_data.csv")
```



```{r}
emissions_long %>%
  data.frame(stringsAsFactors = FALSE) %>% 
  plot_ly(
    ids = ~id,
    labels = ~label,
    parents = ~parent,
    values = ~value,
    type = "treemap",
    branchvalues = "total",
    maxdepth = 2,
    textinfo='label+percent root+entry'
  )
```


```{r}
filtered_ghg_hierchary <- ghg_hierchary %>% 
  filter(year == 1990) %>%
  filter(pollutant %in% c("CO2", "N2O"))

emissions <- filtered_ghg_hierchary %>% 
  filter(!value < 0) %>% 
  group_by(id, label, parent) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  mutate(units = "megatonnes of CO2 equivelant")

sinks <- filtered_ghg_hierchary %>% 
  filter(value < 0) %>% 
  mutate(value = value * -1) %>% 
  group_by(id, label, parent) %>% 
  summarise(value = sum(value, na.rm = TRUE), .groups = 'drop_last') %>% 
  mutate(units = "megatonnes of CO2 equivelant")

emissions %>% 
  data.frame(stringsAsFactors = FALSE) %>% 
  plot_ly(
    ids = ~id,
    labels = ~label,
    parents = ~parent,
    values = ~value,
    type = "sunburst",
    maxdepth = 2,
    insidetextorientation = 'radial',
    marker=list(colorscale='Viridis'),
    text = ~units,
    textinfo='label+percent root+value',
    hoverinfo = paste("%{label}: <br>%{value}",'text')
  )
```


```{r}

```



```{r}
plot_ly(
  labels = c("Eve", "Seth", "Enos", "Noam", "Awan", "Enoch"),
  parents = c("", "Eve", "Seth", "Seth", "Eve", "Awan"),
  values = c(16, 12, 10, 2, 4, 4),
  type = "sunburst",
  branchvalues = "total"
)
```

```{r}
tibble(
  labels = c("Eve", "Seth", "Enos", "Noam", "Awan", "Enoch"),
  parents = c("", "Eve", "Seth", "Seth", "Eve", "Awan"),
  values = c(16, 12, 10, 2, 4, 4)
)
```

```{r}
emissions_long
```


```{r}
fig <- plot_ly(
  labels = cain_ble$labels,
  parents = cain_ble$parents,
  values = cain_ble$values,
  type = 'sunburst'
)

fig
```


```{r}
emissions_long$label[1:10]
```


```{r}
d <- data.frame(
    ids = c(
    "North America", "Europe", "Australia", "North America - Football", "Soccer",
    "North America - Rugby", "Europe - Football", "Rugby",
    "Europe - American Football","Australia - Football", "Association",
    "Australian Rules", "Autstralia - American Football", "Australia - Rugby",
    "Rugby League", "Rugby Union"
  ),
  labels = c(
    "North<br>America", "Europe", "Australia", "Football", "Soccer", "Rugby",
    "Football", "Rugby", "American<br>Football", "Football", "Association",
    "Australian<br>Rules", "American<br>Football", "Rugby", "Rugby<br>League",
    "Rugby<br>Union"
  ),
  parents = c(
    "", "", "", "North America", "North America", "North America", "Europe",
    "Europe", "Europe","Australia", "Australia - Football", "Australia - Football",
    "Australia - Football", "Australia - Football", "Australia - Rugby",
    "Australia - Rugby"
  ),
  stringsAsFactors = FALSE
)

fig <- plot_ly(d, ids = ~ids, labels = ~labels, parents = ~parents, type = 'sunburst')

d
```

```{r}
class(emissions_long)
```
```{r}
class(cain_ble)
```


```{r}
diff_order <- ghg_emissions_clean %>% 
  dplyr::group_by(ccp_mapping, year, pollutant) %>% 
  dplyr::summarise(value = sum(value), units = units[1], .groups = "keep") %>%
  filter(pollutant == "CO2") %>% 
  filter(year == min(ghg_emissions_clean$year) |
           year == max(ghg_emissions_clean$year)) %>% 
  ungroup() %>% 
  group_by(ccp_mapping) %>% 
  mutate(diff = lag(value) - value) %>% 
  arrange(diff) %>%
  drop_na() %>% 
  select(ccp_mapping) %>% 
  pull()
```


```{r}
ghg_emissions_clean %>% 
  dplyr::group_by(ccp_mapping, year, pollutant) %>% 
  dplyr::summarise(value = sum(value), units = units[1], .groups = "keep") %>%
  filter(pollutant == "CO2") %>%
  mutate(ccp_mapping = factor(ccp_mapping, levels = rev(diff_order))) %>% 
  ggplot() +
  aes(x = year, y = value, fill = ccp_mapping) +
  geom_area() +
  facet_wrap(~pollutant) +
  theme_bw()
```
```{r}
ghg_emissions_clean %>% 
  group_by(pollutant) %>% 
  summarise(emissions = sum(value))
```


```{r}
plotly::ggplotly(
  ghg_emissions_clean %>% 
    filter(year == 2018) %>%
    filter(pollutant %in% c("CO2", "CH4")) %>%
    ggplot() +
    aes(x = factor(ccp_mapping, levels = rev(levels(factor(ccp_mapping)))),
        y = value, fill = source_name,
        text = paste0('</br> Sector: ', ccp_mapping,
                      '</br> Emissions: ', value,
                      '</br> Source Name: ', source_name)) +
    geom_col(position = "stack") +
    theme_bw() +
    theme(legend.position = "none") +
    labs(x = "Sector",
         y = paste0("Emissions (", ghg_emissions_clean$units[1], ")")) +
    coord_flip(),
    tooltip = 'text'
  )

```


```{r}
ghg_emissions_data %>% 
  names()
```

```{r}
ghg_emissions_data %>% 
  select()
```



```{r}
input <- list()

input$col_choice = "national_communication_categories"
```



```{r}
ghg_emissions_clean %>%
  group_by_(input$col_choice, "emission_year") %>% 
  summarise(total_ghg_emissions = sum(emissions)) %>% 
  ggplot() +
  aes(x = EmissionYear, y = total_ghg_emissions, group = `National Communication Categories`, colour = `National Communication Categories`) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1990,2020,5)) +
  theme(legend.position = 0)
```

```{r}
ghg_emissions_data %>% 
  distinct(`National Communication Categories`)
```
```{r}
ghg_emissions_data %>% 
  filter(EmissionYear != "BaseYear") %>% 
  mutate(EmissionYear = as.numeric(EmissionYear)) %>% 
  group_by(EmissionYear) %>% 
  summarise(total_ghg_emissions = sum(`Emissions (MtCO2e)`)) %>% 
  ggplot() +
  aes(x = EmissionYear, y = total_ghg_emissions) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1990,2020,5)) +
  ylim(0, 80) +
  theme(legend.position = 0) +
  theme_bw()
```


```{r}
ghg_emissions_data %>% 
  distinct(`CCP mapping`)
```

```{r}

```


```{r}
ghg_emissions_data %>% 
  distinct(`National Communication Categories`)
```
```{r}
ghg_emissions_data
```
```{r}
ghg_emissions_data %>% 
  filter(EmissionYear != "BaseYear") %>% 
  mutate(EmissionYear = as.numeric(EmissionYear)) %>% 
  group_by(`CCP mapping`, EmissionYear) %>% 
  summarise(total_ghg_emissions = sum(`Emissions (MtCO2e)`)) %>% 
  ggplot() +
  aes(x = EmissionYear, y = total_ghg_emissions, group = `CCP mapping`, colour = `CCP mapping`) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = seq(1990,2020,5))
```

```{r}
ghg_emissions_data %>% 
  filter(EmissionYear != "BaseYear") %>% 
  mutate(EmissionYear = as.numeric(EmissionYear)) %>% 
  filter(`National Communication Categories` != `CCP mapping`) %>% 
  select(`National Communication Categories`, `CCP mapping`) %>% 
  unique()
```
category_id,category_name,subcategory_id,subcategory_name,year,emissions,emission

```{r}
emissions_sankey <- emissions_data %>% 
  select(ccp_mapping, source_name, pollutant, emission_year, emissions, units)
```


```{r}
ghg_emissions_clean
```


```{r}
filtered_df <- ghg_emissions_clean %>% 
  select(ccp_mapping, source_name, pollutant, emission_year, emissions, units) %>% 
  filter(pollutant == "CO2") %>% 
  filter(emission_year == "2005")
```

```{r}
total_emissions_for_gas <- filtered_df %>% 
  summarise(sum(emissions)) %>% 
  pull()

total_emissions_by_category <- filtered_df %>% 
  select(-pollutant) %>% 
  group_by(ccp_mapping) %>% 
  summarise(cat_sum = sum(emissions), .groups = 'drop_last')
```

```{r}
categories <- filtered_df %>% 
  distinct(ccp_mapping) %>% 
  pull()

n_categories <- length(categories)

sources <- filtered_df %>% 
  distinct(source_name) %>% 
  pull()

n_sources <- length(sources)
```

```{r}
n_sources
```


```{r}
node_names <- c("Total", categories, sources, "Other")

node_names_df <- data.frame("name" = node_names)

total_sankey_tibble <- total_emissions_by_category %>%
  mutate(total = "Total") %>% 
  mutate(total = match(total, node_names) -1) %>% 
  mutate(ccp_mapping = match(ccp_mapping, node_names) -1) %>% 
  select(source = total,
         target = ccp_mapping,
         value = cat_sum)

total_filtered_emissions <- total_sankey_tibble %>% 
  summarise(sum(value)) %>% 
  pull()

other_emissions <- total_emissions_for_gas - total_filtered_emissions

total_other_sankey_tibble <- tibble(
  "source" = c(0),
  "target" = (match("Other", node_names) -1),
  "value" = c(other_emissions)
)


sub_sankey_tibble <- filtered_df %>% 
  select(- c(units, pollutant, emission_year)) %>% 
  mutate(ccp_mapping = match(ccp_mapping, node_names) -1,
         source_name = match(source_name, node_names) -1)

names(sub_sankey_tibble) = c("source", "target", "value")

sankey_tibble <- total_sankey_tibble %>% 
  bind_rows(sub_sankey_tibble) %>% 
  bind_rows(total_other_sankey_tibble)

links_matrix <- data.frame(as.matrix(sankey_tibble, byrow = TRUE, ncols = 3))

# Add a 'group' column to each connection:
links <- links_matrix %>% 
  mutate(group = case_when(
    source == 0 ~ paste("type_", target, sep = ""),
    source!=0 ~ paste("type_", source, sep = "")
  ))

nodes <- node_names_df
# Add a 'group' column to each node.
# All of them in the same group to make them the same colour
nodes$group <- as.factor(c("my_unique_group"))

emissions <- list()

emissions$nodes <- nodes
emissions$links <- links


```






```{r}
total_filtered_emissions <- total_sankey_tibble %>% 
  summarise(sum(value)) %>% 
  pull()

other_emissions <- total_emissions_for_gas - total_filtered_emissions

total_other_sankey_tibble <- tibble(
  "source" = c(0),
  "target" = (match("Other", node_names) -1),
  "value" = c(other_emissions)
)

sub_sankey_tibble <- filtered_tibble %>% 
  select(-category_id, -subcategory_id, -year) %>% 
  mutate(category_name = match(category_name, node_names) -1,
         subcategory_name = match(subcategory_name, node_names) -1)

names(sub_sankey_tibble) = c("source", "target", "value")

sankey_tibble <- total_sankey_tibble %>% 
  bind_rows(sub_sankey_tibble) %>% 
  bind_rows(total_other_sankey_tibble)

links_matrix <- data.frame(as.matrix(sankey_tibble, byrow = TRUE, ncols = 3))

# Add a 'group' column to each connection:
links <- links_matrix %>% 
  mutate(group = case_when(
    source == 0 ~ paste("type_", target, sep = ""),
    source!=0 ~ paste("type_", source, sep = "")
  ))

nodes <- node_names_df
# Add a 'group' column to each node.
# All of them in the same group to make them the same colour
nodes$group <- as.factor(c("my_unique_group"))

emissions <- list()

emissions$nodes <- nodes
emissions$links <- links
```








```{r}
make_sankey_dfs <- function(data, userYear, userGas) {
  n_categories <- data %>% 
    distinct(category_name) %>% 
    nrow()
  
  total_emissions_for_gas <- data %>% 
    filter(emission == userGas()) %>% 
    filter(year == userYear()) %>%
    summarise(sum(emissions)) %>% 
    pull()
  
  filtered_tibble <- data %>%
    filter(emission == userGas()) %>% 
    select(-emission) %>% 
    filter(year == userYear()) %>% 
    filter(emissions > userResolution())
  
  total_emissions_by_cat <- filtered_tibble %>%
    group_by(category_name) %>% 
    summarise(cat_sum = sum(emissions), .groups = 'drop_last')
  
  categories <- filtered_tibble %>%
    distinct(category_name) %>% 
    pull()
  
  subcategories <- filtered_tibble %>%
    distinct(subcategory_name) %>% 
    pull()
  
  node_names <- c("Total", categories, subcategories, "Other")
  
  node_names_df <- data.frame("name" = node_names)
  
  total_sankey_tibble <- total_emissions_by_cat %>%
    mutate(total = "Total") %>% 
    mutate(total = match(total, node_names) -1) %>% 
    mutate(category_name = match(category_name, node_names) -1) %>% 
    select(source = total,
           target = category_name,
           value = cat_sum)
  
  total_filtered_emissions <- total_sankey_tibble %>% 
    summarise(sum(value)) %>% 
    pull()
  
  other_emissions <- total_emissions_for_gas - total_filtered_emissions
  
  total_other_sankey_tibble <- tibble(
    "source" = c(0),
    "target" = (match("Other", node_names) -1),
    "value" = c(other_emissions)
  )
  
  sub_sankey_tibble <- filtered_tibble %>% 
    select(-category_id, -subcategory_id, -year) %>% 
    mutate(category_name = match(category_name, node_names) -1,
           subcategory_name = match(subcategory_name, node_names) -1)
  
  names(sub_sankey_tibble) = c("source", "target", "value")
  
  sankey_tibble <- total_sankey_tibble %>% 
    bind_rows(sub_sankey_tibble) %>% 
    bind_rows(total_other_sankey_tibble)
  
  links_matrix <- data.frame(as.matrix(sankey_tibble, byrow = TRUE, ncols = 3))
  
  # Add a 'group' column to each connection:
  links <- links_matrix %>% 
    mutate(group = case_when(
      source == 0 ~ paste("type_", target, sep = ""),
      source!=0 ~ paste("type_", source, sep = "")
    ))
  
  nodes <- node_names_df
  # Add a 'group' column to each node.
  # All of them in the same group to make them the same colour
  nodes$group <- as.factor(c("my_unique_group"))
  
  emissions <- list()
  
  emissions$nodes <- nodes
  emissions$links <- links
  
  return(emissions)
}
```


```{r}
make_sankey_dfs(emissions_sankey, userYear = 2005, userGas = "CH4", userResolution = 50)
```
```{r}

```

